# -*- coding: utf-8 -*-
import scrapy

from baidu import items
import json

class BaiduyoujiSpider(scrapy.Spider):
    name = "baiduyouji"
    allowed_domains = ["lvyou.baidu.com"]
    start_urls = ["http://lvyou.baidu.com/scene/"]
    
    def getYoujiContent(self, response):
        yjs = response.selector.xpath('//div[@class="html-content"]/p/text()|//div[@class="html-content"]/p/*/text()|//div[@class="html-content"]/p/*/*/text()').extract()
        content = "".join(yjs)
        youji = items.BaiduYouji()
        youji['mdd'] = response.meta['cname']
        youji['url'] = response.url
        youji['title'] = response.meta['title']
        youji['content'] = content
        yield youji

    def parseYoujiForPage(self, response):
        res = json.loads(response.body)
        #TODO parse page 1
        for note in res['data']['search_res']['notes_list']:
            url = note['loc'] + "/d"
            title = note['title']
            yield scrapy.Request(url, callback=self.getYoujiContent, meta={'cname':response.meta['cname'], 'title': title})


    def parseYoujis(self, response):
        #gnum = response.selector.xpath('//*[@id="tangram-pager--pager"]/a[@class="tangram-pager-last"]/@page').extract()
        #rint "xxxxxxxxxxx",pgnum
        res = json.loads(response.body)
        #TODO parse page 1
        for note in res['data']['search_res']['notes_list']:
            url = note['loc'] + "/d"
            title = note['title']
            yield scrapy.Request(url, callback=self.getYoujiContent, meta={'cname':response.meta['cname'], 'title': title})

        prefix = "http://lvyou.baidu.com/search/ajax/search?format=ajax&word=%s8&rn=10&pn=%s"
        pgnum = res['data']['search_res']['pagelist'][-1][0]
        for p in range(20, pgnum+10, 10):
            url = prefix % (response.meta['cname'], p)
            yield scrapy.Request(url, callback=self.parseYoujiForPage, meta=response.meta)

    def parse(self, response):
        prefix = "http://lvyou.baidu.com/search/ajax/search?format=ajax&word=%s8&rn=10"
        mddlist = response.selector.xpath('//*[@id="body"]/section[4]/ul/li/p[2]/a')
        for mdditem in mddlist:
            mdd = items.BaiduMDD()
            mdd['cname'] = mdditem.xpath('text()').extract()
            mdd['url'] = mdditem.xpath('@href').extract()
            yield mdd
            yield scrapy.Request(prefix % mdd['cname'][0], callback=self.parseYoujis, meta={'cname': mdd['cname'][0]})
        mddlist = response.selector.xpath('//*[@id="body"]/section[5]/ul/li/p[2]/a')
        for mdditem in mddlist:
            mdd = items.BaiduMDD()
            mdd['cname'] = mdditem.xpath('text()').extract()
            mdd['url'] = mdditem.xpath('@href').extract()
            yield mdd

        #TODO parse youji
        #url = "http://lvyou.baidu.com/notes"
        #yield scrapy.Request(url, callback=self.parseYouji)
